{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 编写网络爬虫，从东方财富网站上抓取贵州茅台[600519]的历史业绩报表\n",
    "\n",
    "    "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 1. 查找数据源，分析数据源\n",
    "\n",
    "    - 打开东财网站，查找包含贵州茅台[600519]业绩报表的网页。\n",
    "    \n",
    "    - 打开浏览器的开发者工具[F12]，分析Network中每个网络请求的Name以及Response，获取返回业务报表数据的请求地址。\n",
    "    "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 2. 编写代码，获取数据"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 导入第三方库\n",
    "\n",
    "import json\n",
    "import urllib3\n",
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 从网页中提取获取数据的URL\n",
    "# URL中的参数可能会发生变化，同学们需要自己分析、提取URL。\n",
    "url = 'http://dcfm.eastmoney.com//em_mutisvcexpandinterface/api/js/get?' \\\n",
    "      'type=YJBB20_YJBB&token=70f12f2f4f091e459a279469fe49eca5&st=reportdate&sr=-1' \\\n",
    "      '&filter=(scode={0})&p={page}&ps={pageSize}&js={\"pages\":(tp),\"data\":%20(x)}'\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 创建网络请求连接池\n",
    "conn_pool = urllib3.PoolManager()\n",
    "\n",
    "# 设置要抓取数据的股票代码\n",
    "code = '600519'\n",
    "\n",
    "# 发送网络请求，获取返回结果\n",
    "response = conn_pool.request('GET', url.replace('{0}', code))\n",
    "\n",
    "# 对返回结果中的数据进行解码\n",
    "result = response.data.decode('UTF-8')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 3. 提取数据，并且保存到文件中"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[{'scode': '600519',\n",
       "  'sname': '贵州茅台',\n",
       "  'securitytype': 'A股',\n",
       "  'trademarket': '上交所主板',\n",
       "  'latestnoticedate': '2019-10-16T00:00:00',\n",
       "  'reportdate': '2019-09-30T00:00:00',\n",
       "  'basiceps': 24.24,\n",
       "  'cutbasiceps': '-',\n",
       "  'totaloperatereve': 63508663046.7,\n",
       "  'ystz': 15.5349,\n",
       "  'yshz': 19.4939,\n",
       "  'parentnetprofit': 30454855385.44,\n",
       "  'sjltz': 23.1317,\n",
       "  'sjlhz': 20.3244,\n",
       "  'roeweighted': 24.92,\n",
       "  'bps': 99.71217639,\n",
       "  'mgjyxjje': 21.744507,\n",
       "  'xsmll': 91.4894,\n",
       "  'publishname': '酿酒行业',\n",
       "  'assigndscrpt': '-',\n",
       "  'gxl': '-',\n",
       "  'securitytypecode': '058001001',\n",
       "  'trademarketcode': '069001001001',\n",
       "  'firstnoticedate': '2019-10-16T00:00:00'},\n",
       " {'scode': '600519',\n",
       "  'sname': '贵州茅台',\n",
       "  'securitytype': 'A股',\n",
       "  'trademarket': '上交所主板',\n",
       "  'latestnoticedate': '2019-07-18T00:00:00',\n",
       "  'reportdate': '2019-06-30T00:00:00',\n",
       "  'basiceps': 15.88,\n",
       "  'cutbasiceps': 15.95,\n",
       "  'totaloperatereve': 41172681309.94,\n",
       "  'ystz': 16.7971,\n",
       "  'yshz': -16.8518,\n",
       "  'parentnetprofit': 19951025609.22,\n",
       "  'sjltz': 26.5592,\n",
       "  'sjlhz': -22.2061,\n",
       "  'roeweighted': 16.21,\n",
       "  'bps': 91.35097485,\n",
       "  'mgjyxjje': 19.174465,\n",
       "  'xsmll': 91.8668,\n",
       "  'publishname': '酿酒行业',\n",
       "  'assigndscrpt': '不分配不转增',\n",
       "  'gxl': '-',\n",
       "  'securitytypecode': '058001001',\n",
       "  'trademarketcode': '069001001001',\n",
       "  'firstnoticedate': '2019-07-18T00:00:00'}]"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 解析抓取结果\n",
    "result = json.loads(result)\n",
    "\n",
    "# 取出业绩报表相关的数据\n",
    "reports = result['data']\n",
    "\n",
    "# 显示最近的两个季度的业绩报表\n",
    "reports[:2]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>assigndscrpt</th>\n",
       "      <th>basiceps</th>\n",
       "      <th>bps</th>\n",
       "      <th>cutbasiceps</th>\n",
       "      <th>firstnoticedate</th>\n",
       "      <th>gxl</th>\n",
       "      <th>latestnoticedate</th>\n",
       "      <th>mgjyxjje</th>\n",
       "      <th>parentnetprofit</th>\n",
       "      <th>publishname</th>\n",
       "      <th>...</th>\n",
       "      <th>securitytypecode</th>\n",
       "      <th>sjlhz</th>\n",
       "      <th>sjltz</th>\n",
       "      <th>sname</th>\n",
       "      <th>totaloperatereve</th>\n",
       "      <th>trademarket</th>\n",
       "      <th>trademarketcode</th>\n",
       "      <th>xsmll</th>\n",
       "      <th>yshz</th>\n",
       "      <th>ystz</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>-</td>\n",
       "      <td>24.24</td>\n",
       "      <td>99.712176</td>\n",
       "      <td>-</td>\n",
       "      <td>2019-10-16T00:00:00</td>\n",
       "      <td>-</td>\n",
       "      <td>2019-10-16T00:00:00</td>\n",
       "      <td>21.744507</td>\n",
       "      <td>3.045486e+10</td>\n",
       "      <td>酿酒行业</td>\n",
       "      <td>...</td>\n",
       "      <td>058001001</td>\n",
       "      <td>20.3244</td>\n",
       "      <td>23.1317</td>\n",
       "      <td>贵州茅台</td>\n",
       "      <td>6.350866e+10</td>\n",
       "      <td>上交所主板</td>\n",
       "      <td>069001001001</td>\n",
       "      <td>91.4894</td>\n",
       "      <td>19.4939</td>\n",
       "      <td>15.5349</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>不分配不转增</td>\n",
       "      <td>15.88</td>\n",
       "      <td>91.350975</td>\n",
       "      <td>15.95</td>\n",
       "      <td>2019-07-18T00:00:00</td>\n",
       "      <td>-</td>\n",
       "      <td>2019-07-18T00:00:00</td>\n",
       "      <td>19.174465</td>\n",
       "      <td>1.995103e+10</td>\n",
       "      <td>酿酒行业</td>\n",
       "      <td>...</td>\n",
       "      <td>058001001</td>\n",
       "      <td>-22.2061</td>\n",
       "      <td>26.5592</td>\n",
       "      <td>贵州茅台</td>\n",
       "      <td>4.117268e+10</td>\n",
       "      <td>上交所主板</td>\n",
       "      <td>069001001001</td>\n",
       "      <td>91.8668</td>\n",
       "      <td>-16.8518</td>\n",
       "      <td>16.7971</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>2 rows × 24 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "  assigndscrpt  basiceps        bps cutbasiceps      firstnoticedate gxl  \\\n",
       "0            -     24.24  99.712176           -  2019-10-16T00:00:00   -   \n",
       "1       不分配不转增     15.88  91.350975       15.95  2019-07-18T00:00:00   -   \n",
       "\n",
       "      latestnoticedate   mgjyxjje  parentnetprofit publishname   ...     \\\n",
       "0  2019-10-16T00:00:00  21.744507     3.045486e+10        酿酒行业   ...      \n",
       "1  2019-07-18T00:00:00  19.174465     1.995103e+10        酿酒行业   ...      \n",
       "\n",
       "  securitytypecode    sjlhz    sjltz sname totaloperatereve  trademarket  \\\n",
       "0        058001001  20.3244  23.1317  贵州茅台     6.350866e+10        上交所主板   \n",
       "1        058001001 -22.2061  26.5592  贵州茅台     4.117268e+10        上交所主板   \n",
       "\n",
       "   trademarketcode    xsmll     yshz     ystz  \n",
       "0     069001001001  91.4894  19.4939  15.5349  \n",
       "1     069001001001  91.8668 -16.8518  16.7971  \n",
       "\n",
       "[2 rows x 24 columns]"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 转换成DataFrame数据类型，保存到csv文件中\n",
    "reports = pd.DataFrame(reports)\n",
    "\n",
    "# 显示最近两个季度的数据\n",
    "reports[:2]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 将提取的数据保存到文件中\n",
    "\n",
    "reports.to_csv(code+'.csv',encoding='utf-8-sig')"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
